from urllib.request import urlopen


# 截取两字符串中的字符串
def substr(str, str1, str2):
    try:
        str1x = str.index(str1)
        if str1x < 0:
            return
        str2x = str.index(str2)
        if str2x <= 0:
            return
        str1x += len(str1)
    except BaseException:
        return
    else:
        return str[str1x:str2x]

    # 处理的更美观


def mreplace(content):
    try:
        content = content[content.index("<div id=\"text_c\">"):]
        content = content[:content.index("</div>")]
        content = content.replace("<br />", "\n")
        content = content.replace("<div id=\"content\">", "")
        content = content.replace("&nbsp;", " ")
        content = content.replace("<!-- 固定开始 -->", "")
        content = content.replace("</div>", "")
        content = content.replace("<div id=\"text_c\">", "")
    except BaseException:
        return
    else:
        return content


# 截取本章标题
def GetTitle(content):
    return substr(content, "<title>", "</title>")


# 获取下一页
def Getnext(content):
    return substr(content, "<a id=\"nextLink\" href=\"", "\">下一章</a>")


def gethtml(url):
    content = ""
    for line in urlopen(url):
        line = line.decode('utf-8')
        content += line
    return content
